In [1]:
import numpy as np
import pandas as pd
In [2]:
import warnings
warnings.filterwarnings('ignore')
In [3]:
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
In [4]:
df1=pd.read_csv("C:\\Users\\baluk\\Downloads\\prevalence-by-mental-and-substance-use-disorder.csv")
df2=pd.read_csv("C:\\Users\\baluk\\Downloads\\mental-and-substance-use-as-share-of-disease (1).csv")
In [5]:
df1.head()
Out[5]:
Entity Code Year Prevalence - Schizophrenia - Sex: Both - Age: Age-standardized (Percent) Prevalence - Bipolar disorder - Sex: Both - Age: Age-standardized (Percent) Prevalence - Eating disorders - Sex: Both - Age: Age-standardized (Percent) Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent) Prevalence - Drug use disorders - Sex: Both - Age: Age-standardized (Percent) Prevalence - Depressive disorders - Sex: Both - Age: Age-standardized (Percent) Prevalence - Alcohol use disorders - Sex: Both - Age: Age-standardized (Percent)
0 Afghanistan AFG 1990 0.228979 0.721207 0.131001 4.835127 0.454202 5.125291 0.444036
1 Afghanistan AFG 1991 0.228120 0.719952 0.126395 4.821765 0.447112 5.116306 0.444250
2 Afghanistan AFG 1992 0.227328 0.718418 0.121832 4.801434 0.441190 5.106558 0.445501
3 Afghanistan AFG 1993 0.226468 0.717452 0.117942 4.789363 0.435581 5.100328 0.445958
4 Afghanistan AFG 1994 0.225567 0.717012 0.114547 4.784923 0.431822 5.099424 0.445779
In [6]:
df2.head()
Out[6]:
Entity Code Year DALYs (Disability-Adjusted Life Years) - Mental disorders - Sex: Both - Age: All Ages (Percent)
0 Afghanistan AFG 1990 1.696670
1 Afghanistan AFG 1991 1.734281
2 Afghanistan AFG 1992 1.791189
3 Afghanistan AFG 1993 1.776779
4 Afghanistan AFG 1994 1.712986
In [7]:
data = pd.merge(df1, df2)
data.head()
Out[7]:
Entity Code Year Prevalence - Schizophrenia - Sex: Both - Age: Age-standardized (Percent) Prevalence - Bipolar disorder - Sex: Both - Age: Age-standardized (Percent) Prevalence - Eating disorders - Sex: Both - Age: Age-standardized (Percent) Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent) Prevalence - Drug use disorders - Sex: Both - Age: Age-standardized (Percent) Prevalence - Depressive disorders - Sex: Both - Age: Age-standardized (Percent) Prevalence - Alcohol use disorders - Sex: Both - Age: Age-standardized (Percent) DALYs (Disability-Adjusted Life Years) - Mental disorders - Sex: Both - Age: All Ages (Percent)
0 Afghanistan AFG 1990 0.228979 0.721207 0.131001 4.835127 0.454202 5.125291 0.444036 1.696670
1 Afghanistan AFG 1991 0.228120 0.719952 0.126395 4.821765 0.447112 5.116306 0.444250 1.734281
2 Afghanistan AFG 1992 0.227328 0.718418 0.121832 4.801434 0.441190 5.106558 0.445501 1.791189
3 Afghanistan AFG 1993 0.226468 0.717452 0.117942 4.789363 0.435581 5.100328 0.445958 1.776779
4 Afghanistan AFG 1994 0.225567 0.717012 0.114547 4.784923 0.431822 5.099424 0.445779 1.712986
In [8]:
data.isnull().sum()
Out[8]:
Entity                                                                                               0
Code                                                                                               690
Year                                                                                                 0
Prevalence - Schizophrenia - Sex: Both - Age: Age-standardized (Percent)                             0
Prevalence - Bipolar disorder - Sex: Both - Age: Age-standardized (Percent)                          0
Prevalence - Eating disorders - Sex: Both - Age: Age-standardized (Percent)                          0
Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)                         0
Prevalence - Drug use disorders - Sex: Both - Age: Age-standardized (Percent)                        0
Prevalence - Depressive disorders - Sex: Both - Age: Age-standardized (Percent)                      0
Prevalence - Alcohol use disorders - Sex: Both - Age: Age-standardized (Percent)                     0
DALYs (Disability-Adjusted Life Years) - Mental disorders - Sex: Both - Age: All Ages (Percent)      0
dtype: int64
In [9]:
data.drop('Code',axis=1,inplace=True)
data.head()
Out[9]:
Entity Year Prevalence - Schizophrenia - Sex: Both - Age: Age-standardized (Percent) Prevalence - Bipolar disorder - Sex: Both - Age: Age-standardized (Percent) Prevalence - Eating disorders - Sex: Both - Age: Age-standardized (Percent) Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent) Prevalence - Drug use disorders - Sex: Both - Age: Age-standardized (Percent) Prevalence - Depressive disorders - Sex: Both - Age: Age-standardized (Percent) Prevalence - Alcohol use disorders - Sex: Both - Age: Age-standardized (Percent) DALYs (Disability-Adjusted Life Years) - Mental disorders - Sex: Both - Age: All Ages (Percent)
0 Afghanistan 1990 0.228979 0.721207 0.131001 4.835127 0.454202 5.125291 0.444036 1.696670
1 Afghanistan 1991 0.228120 0.719952 0.126395 4.821765 0.447112 5.116306 0.444250 1.734281
2 Afghanistan 1992 0.227328 0.718418 0.121832 4.801434 0.441190 5.106558 0.445501 1.791189
3 Afghanistan 1993 0.226468 0.717452 0.117942 4.789363 0.435581 5.100328 0.445958 1.776779
4 Afghanistan 1994 0.225567 0.717012 0.114547 4.784923 0.431822 5.099424 0.445779 1.712986
In [10]:
data.size,data.shape
Out[10]:
(68400, (6840, 10))
In [11]:
data.set_axis(['Country','Year','Schizophrenia', 'Bipolar_disorder', 'Eating_disorder','Anxiety','drug_usage','depression','alcohol','mental_fitness'], axis='columns', inplace=True)
In [12]:
data.head()
Out[12]:
Country Year Schizophrenia Bipolar_disorder Eating_disorder Anxiety drug_usage depression alcohol mental_fitness
0 Afghanistan 1990 0.228979 0.721207 0.131001 4.835127 0.454202 5.125291 0.444036 1.696670
1 Afghanistan 1991 0.228120 0.719952 0.126395 4.821765 0.447112 5.116306 0.444250 1.734281
2 Afghanistan 1992 0.227328 0.718418 0.121832 4.801434 0.441190 5.106558 0.445501 1.791189
3 Afghanistan 1993 0.226468 0.717452 0.117942 4.789363 0.435581 5.100328 0.445958 1.776779
4 Afghanistan 1994 0.225567 0.717012 0.114547 4.784923 0.431822 5.099424 0.445779 1.712986
In [13]:
plt.figure(figsize=(12,6))
sns.heatmap(data.corr(),annot=True,cmap='Blues')
plt.plot()
Out[13]:
[]
In [14]:
sns.jointplot(x='Schizophrenia', y='mental_fitness', data=data, kind='reg', color='m')
plt.show()
In [15]:
sns.jointplot(x='Bipolar_disorder', y='mental_fitness', data=data, kind='reg', color='blue')
plt.show()
In [16]:
sns.pairplot(data,corner=True)
plt.show()
In [17]:
mean = data['mental_fitness'].mean()
mean
Out[17]:
4.8180618117506135
In [18]:
fig = px.pie(data, values='mental_fitness', names='Year')
fig.show()
3.8%3.78%3.75%3.72%3.7%3.68%3.67%3.63%3.6%3.57%3.52%3.49%3.46%3.42%3.38%3.33%3.29%3.26%3.23%3.18%3.13%3.09%3.04%3%2.95%2.92%2.9%2.87%2.84%2.8%
201920182017201620152014201320122011201020092008200720062005200420032002200120001999199819971996199519941993199219911990
plotly-logomark
In [19]:
fig=px.bar(data.head(10),x='Year',y='mental_fitness',color='Year',template='ggplot2')
fig.show()
1990199219941996199800.20.40.60.811.21.41.61.8
1990199119921993199419951996199719981999YearYearmental_fitness
plotly-logomark
In [20]:
fig = px.line(data, x="Year", y="mental_fitness", color='Country',markers=True,color_discrete_sequence=['red','blue'],template='plotly_dark')
fig.show()
199019952000200520102015202002468101214
CountryAfghanistanAfrican Region (WHO)AlbaniaAlgeriaAmerican SamoaAndorraAngolaAntigua and BarbudaArgentinaArmeniaAustraliaAustriaAzerbaijanBahamasBahrainBangladeshBarbadosBelarusBelgiumBelizeBeninBermudaBhutanBoliviaBosnia and HerzegovinaBotswanaBrazilBruneiBulgariaBurkina FasoBurundiCambodiaCameroonCanadaCape VerdeCentral African RepublicChadChileChinaColombiaComorosCongoCook IslandsCosta RicaCote d'IvoireCroatiaCubaCyprusCzechiaDemocratic Republic of CongoDenmarkDjiboutiDominicaDominican RepublicEast Asia & Pacific (WB)Eastern Mediterranean Region (WHO)EcuadorEgyptEl SalvadorEnglandEquatorial GuineaEritreaEstoniaEswatiniEthiopiaEurope & Central Asia (WB)European Region (WHO)FijiFinlandFranceG20GabonGambiaGeorgiaGermanyGhanaGreeceGreenlandGrenadaGuamGuatemalaGuineaGuinea-BissauGuyanaHaitiHondurasHungaryIcelandIndiaIndonesiaIranIraqIrelandIsraelItalyJamaicaJapanJordanKazakhstanKenyaKiribatiKuwaitKyrgyzstanLaosLatin America & Caribbean (WB)LatviaLebanonLesothoLiberiaLibyaLithuaniaLuxembourgMadagascarMalawiMalaysiaMaldivesMaliMaltaMarshall IslandsMauritaniaMauritiusMexicoMicronesia (country)Middle East & North Africa (WB)MoldovaMonacoMongoliaMontenegroMoroccoMozambiqueMyanmarNamibiaNauruNepalNetherlandsNew ZealandNicaraguaNigerNigeriaNiueNorth America (WB)North KoreaNorth MacedoniaNorthern IrelandNorthern Mariana IslandsNorwayOECD CountriesOmanPakistanPalauPalestinePanamaPapua New GuineaParaguayPeruPhilippinesPolandPortugalPuerto RicoQatarRegion of the Americas (WHO)RomaniaRussiaRwandaSaint Kitts and NevisSaint LuciaSaint Vincent and the GrenadinesSamoaSan MarinoSao Tome and PrincipeSaudi ArabiaScotlandSenegalSerbiaSeychellesSierra LeoneSingaporeSlovakiaSloveniaSolomon IslandsSomaliaSouth AfricaSouth Asia (WB)South KoreaSouth SudanSouth-East Asia Region (WHO)SpainSri LankaSub-Saharan Africa (WB)SudanSurinameSwedenSwitzerlandSyriaTaiwanTajikistanTanzaniaThailandTimorTogoTokelauTongaTrinidad and TobagoTunisiaTurkeyTurkmenistanTuvaluUgandaUkraineUnited Arab EmiratesUnited KingdomUnited StatesUnited States Virgin IslandsUruguayUzbekistanVanuatuVenezuelaVietnamWalesWestern Pacific Region (WHO)WorldWorld Bank High IncomeWorld Bank Low IncomeWorld Bank Lower Middle IncomeWorld Bank Upper Middle IncomeYemenZambiaZimbabweYearmental_fitness
plotly-logomark
In [21]:
df = data.copy()
In [22]:
df.head()
Out[22]:
Country Year Schizophrenia Bipolar_disorder Eating_disorder Anxiety drug_usage depression alcohol mental_fitness
0 Afghanistan 1990 0.228979 0.721207 0.131001 4.835127 0.454202 5.125291 0.444036 1.696670
1 Afghanistan 1991 0.228120 0.719952 0.126395 4.821765 0.447112 5.116306 0.444250 1.734281
2 Afghanistan 1992 0.227328 0.718418 0.121832 4.801434 0.441190 5.106558 0.445501 1.791189
3 Afghanistan 1993 0.226468 0.717452 0.117942 4.789363 0.435581 5.100328 0.445958 1.776779
4 Afghanistan 1994 0.225567 0.717012 0.114547 4.784923 0.431822 5.099424 0.445779 1.712986
In [23]:
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 6840 entries, 0 to 6839
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Country           6840 non-null   object 
 1   Year              6840 non-null   int64  
 2   Schizophrenia     6840 non-null   float64
 3   Bipolar_disorder  6840 non-null   float64
 4   Eating_disorder   6840 non-null   float64
 5   Anxiety           6840 non-null   float64
 6   drug_usage        6840 non-null   float64
 7   depression        6840 non-null   float64
 8   alcohol           6840 non-null   float64
 9   mental_fitness    6840 non-null   float64
dtypes: float64(8), int64(1), object(1)
memory usage: 587.8+ KB
In [24]:
from sklearn.preprocessing import LabelEncoder
l=LabelEncoder()
for i in df.columns:
    if df[i].dtype == 'object':
        df[i]=l.fit_transform(df[i])
In [25]:
X = df.drop('mental_fitness',axis=1)
y = df['mental_fitness']

from sklearn.model_selection import train_test_split
xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=2)
In [26]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
lr = LinearRegression()
lr.fit(xtrain,ytrain)

# model evaluation for training set
ytrain_pred = lr.predict(xtrain)
mse = mean_squared_error(ytrain, ytrain_pred)
rmse = (np.sqrt(mean_squared_error(ytrain, ytrain_pred)))
r2 = r2_score(ytrain, ytrain_pred)

print("The model performance for training set")
print("--------------------------------------")
print('MSE is {}'.format(mse))
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))
print("\n")

# model evaluation for testing set
ytest_pred = lr.predict(xtest)
mse = mean_squared_error(ytest, ytest_pred)
rmse = (np.sqrt(mean_squared_error(ytest, ytest_pred)))
r2 = r2_score(ytest, ytest_pred)

print("The model performance for testing set")
print("--------------------------------------")
print('MSE is {}'.format(mse))
print('RMSE is {}'.format(rmse))
print('R2 score is {}'.format(r2))
The model performance for training set
--------------------------------------
MSE is 1.3899593724057977
RMSE is 1.1789653821914357
R2 score is 0.7413245790025275


The model performance for testing set
--------------------------------------
MSE is 1.1357545319272402
RMSE is 1.0657178481789822
R2 score is 0.7638974087055269
In [ ]: